rm(list=ls())
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
options(max.print=1000)
surveys_complete <- read_csv('data/surveys_complete.csv')
## Parsed with column specification:
## cols(
##   record_id = col_double(),
##   month = col_double(),
##   day = col_double(),
##   year = col_double(),
##   plot_id = col_double(),
##   species_id = col_character(),
##   sex = col_character(),
##   hindfoot_length = col_double(),
##   weight = col_double(),
##   genus = col_character(),
##   species = col_character(),
##   taxa = col_character(),
##   plot_type = col_character()
## )
view(surveys_complete)

Data Visualization with ggplot2

Basic Template: ggplot(data = , mapping = aes()) + ()

use the ggplot() function - bind the plot to a specific data frame using the data argument - define an aesthetic mapping - add a geom to the plot use + operator to do so. Since we have two continuous variables, lets use geom_point() ( + allows you to modify existing ggplot)

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) + geom_point()

also you can assign and draw the plot with the following code

surveys_plot <- ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))

# note the syntax below for adding layers
surveys_plot + 
  geom_point()

Challenge - Scatter Plots

using the package hexbin ggplot , hexagons are assigned colors based on the number of observations that fall within its boundaries (hexagonal binning)

library("hexbin")

surveys_plot <- ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length))

surveys_plot + 
  geom_hex()

strength: having the color code for the concentration of data in one area

weakness: with using hexagons as a shape, when viewing you see a condensed version of the data and loose information of the nuances for example of outliers

Building Plots Iteratively

ggplot(data = surveys_complete, aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, color = 'blue' )

or we can color each species in the plot differently

ggplot(data = surveys_complete, mapping = aes(x = weight, y = hindfoot_length)) +
    geom_point(alpha = 0.1, aes(color = species_id))

Challenge - Iterative Plots

create a scatter plot of weight over species_id with the plot types showing in different colors

challenge_plot <- ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) 

challenge_plot + 
  geom_point(aes(color= plot_type))

Box Plot

visualize the distribution of weight within each species

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot()

add points to the boxplot for ideas of the number of measurements and of their distribution

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_boxplot(alpha = 0) +
    geom_jitter(alpha = 0.3, color = "tomato")

Challenge - Box Plots

beanplots / violin plots shape and density information are shown (unlike boxplot)

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = weight)) +
    geom_violin(alpha = 0) +
    geom_jitter(alpha = 0.3, color = "tomato") + 
  scale_y_log10()

try exploring other variables

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) + 
  geom_jitter(alpha = 0.1) + 
  geom_point(aes(color = plot_id))+
  geom_boxplot(color = 'red')

# consider changing plot_id from an integer to a factor

ggplot(data = surveys_complete, mapping = aes(x = species_id, y = hindfoot_length)) + 
  geom_jitter(alpha = 0.1, aes(color = factor(c(plot_id)))) + 
  geom_boxplot(color = 'gray')

when changing to a factor from an integer a display of colors appears along the right hand side showing the color and corresponding number key

Plotting time series data

calculate the number of counts per year of each genus

to do this we need to group the data and ocunt records within each group

yearly_counts <- surveys_complete %>%
  count(year, genus)

# visualize timelapse as a line plot 

ggplot(data = yearly_counts, aes(x=year, y=n))+
  geom_line()

# this doesnt work since we plotted the data for all the genera together we need to tell ggplot to draw a line for each genus by modifying the aesthetic function to include... 

ggplot(data = yearly_counts, aes(x = year, y = n, group = genus)) +
    geom_line()

# species by color

ggplot(data = yearly_counts, aes(x = year, y = n, color = genus)) +
    geom_line()

pipe operator %>% we can also use it to pass the data argument to the ggplot() function

build using + not the pipe operator

yearly_counts %>% 
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

# link data manipulation with consequent data visualization 

yearly_counts_graph <- surveys_complete %>%
    count(year, genus) %>% 
    ggplot(mapping = aes(x = year, y = n, color = genus)) +
    geom_line()

yearly_counts_graph

faceting allows the user to split one pllot into multiple plots based on a factor included in the dataset

we will use it to make a time series plot for each species

ggplot(data = yearly_counts, aes(x = year, y = n)) +
    geom_line() +
    facet_wrap(facets = vars(genus))

#split the line in each plot by the sex of each induvidual measured 

# make counts in the data frame 
 yearly_sex_counts <- surveys_complete %>%
                      count(year, genus, sex)

# make the faceting plot by splitting further by sex using color 
 
ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_wrap(facets =  vars(genus))

# facet both by sex and genus 

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(sex), cols =  vars(genus))

# organize the panels only by rows or only by columns

# One column, facet by rows 

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(rows = vars(genus))

# One row, facet by column
ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(cols = vars(genus))

every compoenent of the ggplot graph can be customized using the generic theme() function

change the previous graph to have a simpler white background using the thene_bw() function

 ggplot(data = yearly_sex_counts, 
        mapping = aes(x = year, y = n, color = sex)) +
     geom_line() +
     facet_wrap(vars(genus)) +
     theme_bw()

for a complete list of themes visit https://ggplot2.tidyverse.org/reference/ggtheme.html

Challenge - Create a plot

create a plot that depicts how the average weight of each species changes through the years

yearly_weight <- surveys_complete %>% 
  group_by(species_id, year) %>%
  summarize(avg_weight = mean(weight))
## `summarise()` regrouping output by 'species_id' (override with `.groups` argument)
ggplot(data = yearly_weight, mapping = aes(x=year, y=avg_weight)) +
   geom_line() +
   facet_wrap(vars(species_id)) +
   theme_bw()

customization

# change names of axes 

ggplot(data = yearly_sex_counts, aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
         x = "Year of observation",
         y = "Number of individuals") +
    theme_bw()

# increasing font size

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(text=element_text(size = 16))

# fix overlap of labels 

ggplot(data = yearly_sex_counts, mapping = aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5),
                        axis.text.y = element_text(colour = "grey20", size = 12),
                        strip.text = element_text(face = "italic"),
                        text = element_text(size = 16))

# we also made the strip.text italicized above 

# we can save these changes to make it the default theme 

grey_theme <- theme(axis.text.x = element_text(colour="grey20", size = 12, 
                                               angle = 90, hjust = 0.5, 
                                               vjust = 0.5),
                    axis.text.y = element_text(colour = "grey20", size = 12),
                    text=element_text(size = 16))

ggplot(surveys_complete, aes(x = species_id, y = hindfoot_length)) +
    geom_boxplot() +
    grey_theme

Challenge - Make your own beautiful graph using the gg plot cheat sheet

I am going to build off of the one row, facet from column graph created earlier

# original 

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_line() +
  facet_grid(cols = vars(genus))

# changed 

ggplot(data = yearly_sex_counts, 
       mapping = aes(x = year, y = n, color = sex)) +
  geom_step() +
  facet_grid(cols = vars(genus)) +
  labs(title = "Induviduals of Each Sex Grouped by Species",
       x = "Year",
       y = "Number of Induviduals") +
      theme(axis.text.x = element_text(colour = "grey20", size = 10, angle = 90, hjust = 0.5, vjust = 0.5),
                        axis.text.y = element_text(colour = "grey20", size = 10),
                        strip.text = element_text(face = "italic"),
                        text = element_text(size = 10))

Arranging and exporting plots

# combine plots into a single figure 

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
spp_weight_boxplot <- ggplot(data = surveys_complete, 
                             aes(x = species_id, y = weight)) +
  geom_boxplot() +
  labs(x = "Species", 
       y = expression(log[10](Weight))) +
  scale_y_log10() + 
  labs()

spp_count_plot <- ggplot(data = yearly_counts, 
                         aes(x = year, y = n, color = genus)) +
  geom_line() + 
  labs(x = "Year", y = "Abundance")

grid.arrange(spp_weight_boxplot, spp_count_plot, ncol = 2, widths = c(4, 6))

# adjust width height and dpi
my_plot <- ggplot(data = yearly_sex_counts, 
                  aes(x = year, y = n, color = sex)) +
    geom_line() +
    facet_wrap(vars(genus)) +
    labs(title = "Observed genera through time",
        x = "Year of observation",
        y = "Number of individuals") +
    theme_bw() +
    theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90,
                                     hjust = 0.5, vjust = 0.5),
          axis.text.y = element_text(colour = "grey20", size = 12),
          text = element_text(size = 16))

ggsave("name_of_file.png", my_plot, width = 15, height = 10)

## This also works for grid.arrange() plots
combo_plot <- grid.arrange(spp_weight_boxplot, spp_count_plot, ncol = 2, 
                           widths = c(4, 6))

ggsave("combo_plot_abun_weight.png", combo_plot, width = 10, dpi = 300)
## Saving 10 x 5 in image